Extended Summary

On of the most known measures for ranking countries is HDI - Human Development Index. It takes into account: life expectancy at birth (in years), expected years of schooling, mean years of schooling and gross national income per capita. In this report we decided to choose top 24 countries from Latest Human Development Index Ranking and take a closer look at additional information from World Development Indicators in their context. Moreover, we tried to find correlations between Bitcoins’ data from 2009 to 2021 and S.P.Composite dataset. Finally, short-term prediction for Bitcoin Market Price USD was made.

HDI Top

Top 24 countries according to http://hdr.undp.org/en/content/latest-human-development-index-ranking. Because Liechtenstein had many missing values in WDI dataset, we decided to change it to the next top country in the index - France.

hdiTop <- c('Norway', 'Ireland', 'Switzerland', 'Iceland', 'Hong Kong', 'Germany', 'Sweden', 'Netherlands', 'Australia', 'Denmark', 'Finland', 'Singapore', 'United Kingdom', 'New Zealand', 'Belgium', 'Canada', 'United States', 'Austria', 'Israel', 'Japan', 'Slovenia', 'Luxembourg', 'Korea, Rep.', 'Spain', 'France')

Used libraries

During generation of this report we used the following libraries:

libraries
##  [1] "dplyr"     "tidyr"     "ggplot2"   "readxl"    "imputeTS"  "vtable"   
##  [7] "skimr"     "DT"        "caret"     "gganimate" "ggpubr"

Datasets Reading

currExchRates <- read.csv("./datasets/CurrencyExchangeRates.csv")
goldPrices <- read.csv("./datasets/Gold prices.csv")
spComposite <- read.csv("./datasets/S&P Composite.csv")
worldDevInd <- read_excel("./datasets/World_Development_Indicators.xlsx", trim_ws=TRUE, na = "..")
worldDevInd <- data.frame(worldDevInd)
btcMetadata <- read.csv("./datasets/BCHAIN_metadata.csv")
btcDiff <- read.csv("./datasets/BCHAIN-DIFF.csv")
btcHrate <- read.csv("./datasets/BCHAIN-HRATE.csv")
btcMkpru <- read.csv("./datasets/BCHAIN-MKPRU.csv")
btcTrvou <- read.csv("./datasets/BCHAIN-TRVOU.csv")

Datasets Preparation

Currency Exchange Rates

In order to clean the Currency Exchange Rates Dataset we inserted 0’s in Euro currency before ’98 and used special function to fill empty individual places by making mean from adjacent values.

CurrExchRatesClean <- currExchRates %>% select(which(colMeans(is.na(.)) < 0.2))
CurrExchRatesClean <- CurrExchRatesClean %>% mutate(Euro = ifelse(Date < as.Date("1998-10-30", format="%Y-%m-%d"), 0, Euro))
CurrExchRatesClean <- CurrExchRatesClean %>% mutate(Date = as.Date(Date, format= "%Y-%m-%d"))
CurrExchRatesClean <- na_ma(CurrExchRatesClean, k = 1)

st(CurrExchRatesClean)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
Australian.Dollar 5978 0.768 0.143 0.483 0.665 0.869 1.105
Bahrain.Dinar 5978 0.376 0 0.376 0.376 0.376 0.376
Brazilian.Real 5978 2.158 0.765 0.832 1.72 2.717 4.195
Canadian.Dollar 5978 1.266 0.187 0.917 1.086 1.403 1.613
Colombian.Peso 5978 2082.838 589.734 833.18 1795.37 2450.322 3434.89
Danish.Krone 5978 6.282 0.912 4.665 5.612 6.804 9.006
Euro 5978 1.007 0.476 0 0.938 1.314 1.599
Icelandic.Krona 5978 92.418 24.739 54.72 70.42 116.888 147.98
Indian.Rupee 5978 48.181 9.451 31.37 43.133 52.967 68.778
Japanese.Yen 5978 107.974 14.065 75.86 100.765 118.395 147
Korean.Won 5978 1103.062 160.752 756 1016.675 1186.8 1964.8
Kuwaiti.Dinar 5978 0.294 0.011 0.265 0.287 0.303 0.309
Libyan.Dinar 5978 1.507 0.61 0.525 0.662 1.932 1.932
Malaysian.Ringgit 5978 3.504 0.491 2.436 3.182 3.8 4.725
Nepalese.Rupee 5978 77.184 15.094 49.88 68.475 84.698 109.98
New.Zealand.Dollar 5978 0.66 0.118 0.393 0.579 0.736 0.882
Norwegian.Krone 5978 6.964 1.087 4.959 6.107 7.803 9.606
Pakistani.Rupee 5978 70.139 23.441 30.877 51.866 90.937 115.697
Qatar.Riyal 5978 3.64 0 3.64 3.64 3.64 3.64
Rial.Omani 5978 0.384 0 0.384 0.384 0.384 0.385
Saudi.Arabian.Riyal 5978 3.749 0.002 3.745 3.745 3.75 3.75
Singapore.Dollar 5978 1.503 0.183 1.201 1.361 1.688 1.851
South.African.Rand 5978 8.076 2.849 3.53 6.218 9.941 16.771
Sri.Lanka.Rupee 5978 102.232 28.663 49.57 77.54 126.29 157.646
Swedish.Krona 5978 7.731 1.084 5.843 6.832 8.376 10.995
Swiss.Franc 5978 1.211 0.249 0.725 0.979 1.389 1.823
Thai.Baht 5978 35.229 5.531 24.44 31.568 39.447 56.061
Trinidad.And.Tobago.Dollar 5978 6.309 0.18 5.839 6.26 6.376 6.789
U.A.E..Dirham 5978 3.672 0.001 3.671 3.672 3.672 3.675
U.K..Pound.Sterling 5978 1.615 0.174 1.213 1.518 1.675 2.102
U.S..Dollar 5978 1 0 1 1 1 1

Gold Prices

goldPricesClean <- goldPrices %>% mutate(
  EURO..AM. = ifelse(Date < as.Date("1998-12-31", format="%Y-%m-%d"), 0, EURO..AM.),
  EURO..PM. = ifelse(Date < as.Date("1998-12-31", format="%Y-%m-%d"), 0, EURO..PM.),
  USD..PM. = ifelse(Date < as.Date("1968-03-14", format="%Y-%m-%d"), 0, USD..PM.),
  GBP..PM. = ifelse(Date < as.Date("1968-03-14", format="%Y-%m-%d"), 0, GBP..PM.))

goldPricesClean <- goldPricesClean %>% mutate(Date = as.Date(Date, format= "%Y-%m-%d"))
goldPricesClean <- na_ma(goldPricesClean, k = 1)

st(goldPricesClean)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
USD..AM. 13585 575.166 490.165 34.77 280.5 841.75 2061.5
USD..PM. 13585 574.853 490.136 0 280.3 841.1 2067.15
GBP..AM. 13585 370.554 353.367 14.485 177.63 453.906 1574.37
GBP..PM. 13585 370.381 353.32 0 177.615 453.787 1569.59
EURO..AM. 13585 337.372 477.165 0 0 576.189 1743.82
EURO..PM. 13585 337.309 477.073 0 0 576.776 1743.43

World Development Index and Top Chosen Countries

worldDevInd$Series.Name<-gsub("\\$","D",worldDevInd$Series.Name)

worldDevIndClean <- worldDevInd %>% select(-Series.Code) %>% 
  pivot_longer('X1970..YR1970.':'X2020..YR2020.', names_to = "Year", values_to = "Value", values_drop_na = FALSE) %>%
  pivot_wider(names_from = "Series.Name", values_from = Value) %>% unchop(everything())


worldDevIndClean <- worldDevIndClean %>% 
  mutate(Year = substr(Year, 2, 5)) %>% 
  filter(grepl('', Country.Code)) %>%
  relocate(Year, .after = Country.Code)


trial <- as.data.frame(worldDevIndClean)
trial <- worldDevIndClean %>% filter(Country.Name %in% hdiTop)

#trial <- trial %>% filter(Year==2020) %>% arrange(`GDP (current USD)`) 

trial <- trial %>% select(which(colMeans(is.na(.)) < 0.10))


trial[is.na(trial)] <- 0
wdiTop <- trial

datatable(skim(wdiTop %>% dplyr::select(-Country.Name, -Country.Code, -Year)) %>% dplyr::select(skim_variable, numeric.mean, numeric.sd, numeric.p0, numeric.p25, numeric.p75, numeric.p100) %>% dplyr::rename(Variable=skim_variable, Mean=numeric.mean, "Std. Dev."=numeric.sd, Min=numeric.p0, "Pctl. 25"=numeric.p25, "Pctl. 75"=numeric.p75, Max=numeric.p100))

S.P. Composite

spCompositeClean <- spComposite %>% 
  mutate(Cyclically.Adjusted.PE.Ratio = ifelse(Year < as.Date("1880-12-31", format="%Y-%m-%d"), 0, Cyclically.Adjusted.PE.Ratio)) %>% rename(Date = Year)
spCompositeClean <- spCompositeClean%>% mutate(Date = as.Date(Date, format= "%Y-%m-%d"))
spCompositeClean <- na_ma(spCompositeClean, k = 1)

st(spComposite)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
S.P.Composite 1810 327.968 685.029 2.73 7.903 164.4 4493.28
Dividend 1806 6.732 12.069 0.18 0.42 7.053 59.68
Earnings 1806 15.371 28.933 0.16 0.561 14.726 158.74
CPI 1810 62.392 76.26 6.28 10.2 102.275 273.983
Long.Interest.Rate 1810 4.504 2.306 0.62 3.171 5.139 15.32
Real.Price 1810 621.987 733.091 73.899 186.575 706.991 4477.204
Real.Dividend 1806 17.498 11.114 5.445 9.417 22.301 63.511
Real.Earnings 1806 34.907 29.841 4.576 14.063 43.768 159.504
Cyclically.Adjusted.PE.Ratio 1690 17.215 7.048 4.784 11.898 20.913 44.198

Bitcoins’ Data

btcTrvou <- btcTrvou %>% rename(Trvou = Value)
btcMkpru <- btcMkpru %>% rename(Mkpru = Value)
btcDiff <- btcDiff %>% rename(Diff = Value)
btcHrate <- btcHrate %>% rename(Hrate = Value)
btcAllClean <- btcTrvou %>% left_join(btcMkpru) %>% left_join(btcHrate) %>% left_join(btcDiff)
btcAllClean <- btcAllClean %>% mutate(Date = as.Date(Date, format= "%Y-%m-%d"))
btcAllClean <- na_ma(btcAllClean, k = 1)

st(btcAllClean)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
Trvou 4661 146728050.012 330500192.236 0 194787.108 148443297.885 5352015515.54
Mkpru 4661 5141.227 10990.321 0 7.2 6499.062 63554.44
Hrate 4661 26480643.671 46431207.364 0 12.016 38267606.403 198514005.71
Diff 4661 3667535953756.88 6452342784118.23 0 1689334.405 5363678461480 25046487590083

Attributes analysis

World Development Index - Top Chosen based on HDI

Bitcoins

btcAnalysis <- btcAllClean %>% pivot_longer(cols='Trvou':'Diff', names_to = 'Param', values_to = 'Value')
ggplot(btcAnalysis, aes(x=Date, y=Value, color = as.factor(Param))) + geom_line() + facet_wrap(~Param, scales = "free_y") + theme_minimal() + labs(colour = "Bitcoin's Parameter", title = "Bitcoin's Overview")

Gold Prices

GoldAnalysis <- goldPricesClean %>% pivot_longer(cols = 'USD..AM.':'EURO..PM.', names_to = 'Curr', values_to = 'Value')
ggplot(data = GoldAnalysis, aes(x = Date, y = Value, color=as.factor(Curr))) +
    geom_point(size=0.1) + facet_wrap(~Curr) + theme_minimal() + labs(color='Currency', title='Gold Prices\' Overview')

S.P. Composite

ggplot(data = spCompositeClean, aes(x = Date, y = S.P.Composite)) +
    geom_line(color='blue') + theme_minimal() + labs(title='S.P.Composite through Years') + transition_reveal(Date)

Correlations

Correlation between Hrate and Mkpru

btcPlusSP <- btcAllClean %>% inner_join(spCompositeClean, on='Date')
## Joining, by = "Date"
btcPlusGold <- btcAllClean %>% inner_join(goldPricesClean, on='Date')
## Joining, by = "Date"
ggscatter(btcAllClean, x = "Mkpru", y = "Hrate", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Mkpru", ylab = "Hrate", color = 'plum4')
## `geom_smooth()` using formula 'y ~ x'

Correlations for Bitcoins’ Data Including S.P. Composite

ggscatter(btcPlusSP, x = "Mkpru", y = "S.P.Composite", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Mkpru", ylab = "S.P.Composite", color='maroon2')
## `geom_smooth()` using formula 'y ~ x'

ggscatter(btcPlusSP, x = "Mkpru", y = "CPI", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Mkpru", ylab = "CPI", color='indianred3')
## `geom_smooth()` using formula 'y ~ x'

ggscatter(btcPlusSP, x = "Mkpru", y = "Earnings", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Mkpru", ylab = "Earnings", color='coral2')
## `geom_smooth()` using formula 'y ~ x'

Correlations for Bitcoins’ Data Including Arbitrally Chosen Indexes from WDI

btcYearsSummary <- btcAllClean %>% mutate(Year = format(as.Date(Date, format="%d/%m/%Y"),"%Y")) %>% select(-Date) %>% group_by(Year) %>% dplyr::summarize(MeanDiff = mean(Diff), MeanHrate=mean(Hrate), MeanMkpru=mean(Mkpru), MeanTrvou=mean(Trvou)) %>% filter(Year > 2008 & Year <2021)

btcPlusCountry <- btcYearsSummary %>% inner_join(wdiTop, on='Year') 
## Joining, by = "Year"
for (country in hdiTop) {
  countryExample <- btcPlusCountry %>% filter(Country.Name == country)
  check <- cor(x = countryExample$MeanMkpru, y = countryExample$`GDP per capita (current USD)`, method = "pearson", use = "everything")
  if (check < 0.7 | is.na(check)) {next}
  print(ggscatter(countryExample, x = "MeanMkpru", y = "GDP per capita (current USD)", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "MeanMkpru", ylab = "GDP per capita (current USD)", color='orchid2', title = country))
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

for (country in hdiTop) {
  countryExample <- btcPlusCountry %>% filter(Country.Name == country)
  check <- cor(x = countryExample$MeanMkpru, y = countryExample$`Urban population growth (annual %)`, method = "pearson", use = "everything")
  if (check < 0.7 | is.na(check)) {next}
  print(ggscatter(countryExample, x = "MeanMkpru", y = "Urban population growth (annual %)", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "MeanMkpru", ylab = "Urban population growth (annual %)", color='maroon1', title = country))
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

Model

set.seed(23)

btcAllClean2 <- btcAllClean %>% select(-Date)

inTraining <- createDataPartition(y = btcAllClean2$Mkpru, p = .70, list = FALSE)
train <- btcAllClean2[inTraining,]
valid_and_test  <- btcAllClean2[-inTraining,]

inTesting <- createDataPartition(y = valid_and_test$Mkpru, p = .50, list = FALSE)
valid <- valid_and_test[ -inTesting,]
test <- valid_and_test[ inTesting,]


ctrl <- trainControl(
    # powtórzona ocena krzyżowa
    method = "repeatedcv",
    # liczba podziałów
    number = 2,
    # liczba powtórzeń
    repeats = 5)


fit <- train(Mkpru ~ .,
             data = train,
             method = "rf",
             trControl = ctrl,
             # Paramter dla algorytmu uczącego
             ntree = 10)
## note: only 2 unique complexity parameters in default grid. Truncating the grid to 2 .
fit
## Random Forest 
## 
## 3265 samples
##    3 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (2 fold, repeated 5 times) 
## Summary of sample sizes: 1633, 1632, 1633, 1632, 1632, 1633, ... 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE      Rsquared   MAE     
##   2     2888.579  0.9342471  853.6425
##   3     2783.717  0.9393712  762.8295
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 3.
pred <- predict(fit, newdata = test)
preddf <- data.frame(Pred = pred, Mkpru = test$Mkpru)

#View(preddf)

datatable(preddf)
print(ggscatter(preddf, x = "Mkpru", y = "Pred",
           
           xlab = "Mkpru", ylab = "Predicted Mkpru", color='maroon1') + geom_smooth())
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'